clear all
set more off
set matsize 2000

*input folder path      
global filepath "D:\Dropbox\NREGA Aggregate\shared NREGA lights\replication files"

	  
cd "$filepath\raw data and cleaning"


use "raw lights.dta", clear


***********************
*** Merge Variables ***
***********************

*** NREGS ***
merge 1:1 sno using "nrega_wide.dta", nogen keepusing(nr06 nr07 nr08)
rename st_cen_cd st
drop if sno==.

*** Latitude and Longitude, for spatial s.e. ***
drop if fid==.
merge 1:1 fid using "2011_Dist.dta", nogen keepusing(lat lon)
drop if sno==.

*mean growth rate of lights
gen dlt00_05 = ((avglt2005+.01)/(avglt2000+.01))^(1/6)-1

*pre-mean lights, i.e., 2005 lights
gen pre_meanlt = avglt2005

*** RGGVY ***
merge 1:1 sno using "rggvy.dta", nogen

gen rggvy = 0
replace rggvy=1 if fund_sanc>0 & fund_sanc!=.

*** Deposits, for appendix ***
merge 1:1 sno using "deposits 2005.dta", nogen
rename yr_dep dep_samp


*** Backwards District Criteria ***
merge 1:1 sno using "backwards district criteria.dta", nogen




***********************************************
*** Save as Wide to Merge into Deposit Data ***
***********************************************
preserve
drop if census_no==.
save "lights_wide.dta", replace
restore

*****************
*** Long Data ***
*****************

reshape long  avglt sumlt jobcards, i(sno) j(year)

*************
*** NREGS ***
*************

gen nregs = 0
	replace nregs=1 if year>=2006 & nr06==1
	replace nregs=1 if year>=2007 & nr07==1
	replace nregs=1 if year>=2008 & nr08==1
	
****************
*** Cleaning ***
****************
*Drop Daman/Diu
drop if sno==126 | sno==127

*Set time frame
drop if year<2000 | year>2013




*state ind.
*encode state_name, gen(st)

**Dropping singletons; generating base sample 2000-2013 for backwards districts
*remove "resid" below if using version of <16 version of stata
reghdfe avglt nregs, absorb(dis_fe=i.sno state_by_year=i.year#i.st) cluster(sno) resid
predict e, residuals
gen bs=0
replace bs=1 if e!=.
reghdfe avglt nregs if bs==1, absorb(sno year#st) cluster(sno)
drop if bs!=1

*Standardize lights for interpretation
egen std_lt = std(avglt)

*drop unused variables
drop district st_nm fid var7 var8 dt_cen_cd area si state plan fund_sanc fund_rel censuscode report_no var5 var6 _reghdfe_resid dis_fe state_by_year e bs jobcards sumlt 



*label
label var sno "district id"
label var year "year"
label var nr06 "wave 1"
label var nr07 "wave 2"
label var nr08 "wave 3"
label var nregs "NREGS"
label var district_name "district name"
label var census_no "district census id"
label var avglt "district avg. nighttime light"
label var std_lt "Std. Nighttime Light"
label var state_frac "sc/st frac."
label var wage "agr. wage"
label var outputwage "output per agr. worker"
label var dlt00_05 "mean growth rate lights, 2000-2005"
label var pre_meanlt "mean level of lights, 2000-2005"
label var rggvy "RGGVY, 10th plan"
label var dep_samp "marker for deposit sample"
label var st "state id"
label var lat "latitude"
label var lon "longitude"


order sno census_no state_name district_name year nregs nr06-nr08 avglt std_lt rggvy wage outputwage state_frac dlt00_05 pre_meanlt st 

*save lights panel
save "$filepath\district light panel.dta", replace
